Τhe relationship between obesity and socioeconomic factors and its geographical variation per London borough

Installing the libraries:

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## <U+221A> ggplot2 3.3.2     <U+221A> purrr   0.3.4
## <U+221A> tibble  3.0.4     <U+221A> dplyr   1.0.2
## <U+221A> tidyr   1.1.2     <U+221A> stringr 1.4.0
## <U+221A> readr   1.4.0     <U+221A> forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(tmap)
library(geojsonio)
## 
## Attaching package: 'geojsonio'
## The following object is masked from 'package:base':
## 
##     pretty
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(rgdal)
## Loading required package: sp
## rgdal: version: 1.5-18, (SVN revision 1082)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.0.4, released 2020/01/28
## Path to GDAL shared files: C:/Users/Elena/Documents/R/win-library/4.0/rgdal/gdal
## GDAL binary built with GEOS: TRUE 
## Loaded PROJ runtime: Rel. 6.3.1, February 10th, 2020, [PJ_VERSION: 631]
## Path to PROJ shared files: C:/Users/Elena/Documents/R/win-library/4.0/rgdal/proj
## Linking to sp version:1.4-4
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading rgdal.
library(broom)
library(crosstalk)
library(sf)
## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1
library(sp)
library(spdep)
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:purrr':
## 
##     some
library(fs)
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(broom)
library(spgwr)
## NOTE: This package does not constitute approval of GWR
## as a method of spatial analysis; see example(gwr)
library(GWmodel)
## Loading required package: maptools
## Checking rgeos availability: TRUE
## Loading required package: robustbase
## Loading required package: Rcpp
## Loading required package: spatialreg
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## Registered S3 methods overwritten by 'spatialreg':
##   method                   from 
##   residuals.stsls          spdep
##   deviance.stsls           spdep
##   coef.stsls               spdep
##   print.stsls              spdep
##   summary.stsls            spdep
##   print.summary.stsls      spdep
##   residuals.gmsar          spdep
##   deviance.gmsar           spdep
##   coef.gmsar               spdep
##   fitted.gmsar             spdep
##   print.gmsar              spdep
##   summary.gmsar            spdep
##   print.summary.gmsar      spdep
##   print.lagmess            spdep
##   summary.lagmess          spdep
##   print.summary.lagmess    spdep
##   residuals.lagmess        spdep
##   deviance.lagmess         spdep
##   coef.lagmess             spdep
##   fitted.lagmess           spdep
##   logLik.lagmess           spdep
##   fitted.SFResult          spdep
##   print.SFResult           spdep
##   fitted.ME_res            spdep
##   print.ME_res             spdep
##   print.lagImpact          spdep
##   plot.lagImpact           spdep
##   summary.lagImpact        spdep
##   HPDinterval.lagImpact    spdep
##   print.summary.lagImpact  spdep
##   print.sarlm              spdep
##   summary.sarlm            spdep
##   residuals.sarlm          spdep
##   deviance.sarlm           spdep
##   coef.sarlm               spdep
##   vcov.sarlm               spdep
##   fitted.sarlm             spdep
##   logLik.sarlm             spdep
##   anova.sarlm              spdep
##   predict.sarlm            spdep
##   print.summary.sarlm      spdep
##   print.sarlm.pred         spdep
##   as.data.frame.sarlm.pred spdep
##   residuals.spautolm       spdep
##   deviance.spautolm        spdep
##   coef.spautolm            spdep
##   fitted.spautolm          spdep
##   print.spautolm           spdep
##   summary.spautolm         spdep
##   logLik.spautolm          spdep
##   print.summary.spautolm   spdep
##   print.WXImpact           spdep
##   summary.WXImpact         spdep
##   print.summary.WXImpact   spdep
##   predict.SLX              spdep
## 
## Attaching package: 'spatialreg'
## The following objects are masked from 'package:spdep':
## 
##     anova.sarlm, as.spam.listw, as_dgRMatrix_listw, as_dsCMatrix_I,
##     as_dsCMatrix_IrW, as_dsTMatrix_listw, bptest.sarlm, can.be.simmed,
##     cheb_setup, coef.gmsar, coef.sarlm, coef.spautolm, coef.stsls,
##     create_WX, deviance.gmsar, deviance.sarlm, deviance.spautolm,
##     deviance.stsls, do_ldet, eigen_pre_setup, eigen_setup, eigenw,
##     errorsarlm, fitted.gmsar, fitted.ME_res, fitted.sarlm,
##     fitted.SFResult, fitted.spautolm, get.ClusterOption,
##     get.coresOption, get.mcOption, get.VerboseOption,
##     get.ZeroPolicyOption, GMargminImage, GMerrorsar, griffith_sone,
##     gstsls, Hausman.test, HPDinterval.lagImpact, impacts, intImpacts,
##     Jacobian_W, jacobianSetup, l_max, lagmess, lagsarlm, lextrB,
##     lextrS, lextrW, lmSLX, logLik.sarlm, logLik.spautolm, LR.sarlm,
##     LR1.sarlm, LR1.spautolm, LU_prepermutate_setup, LU_setup,
##     Matrix_J_setup, Matrix_setup, mcdet_setup, MCMCsamp, ME, mom_calc,
##     mom_calc_int2, moments_setup, powerWeights, predict.sarlm,
##     predict.SLX, print.gmsar, print.ME_res, print.sarlm,
##     print.sarlm.pred, print.SFResult, print.spautolm, print.stsls,
##     print.summary.gmsar, print.summary.sarlm, print.summary.spautolm,
##     print.summary.stsls, residuals.gmsar, residuals.sarlm,
##     residuals.spautolm, residuals.stsls, sacsarlm, SE_classic_setup,
##     SE_interp_setup, SE_whichMin_setup, set.ClusterOption,
##     set.coresOption, set.mcOption, set.VerboseOption,
##     set.ZeroPolicyOption, similar.listw, spam_setup, spam_update_setup,
##     SpatialFiltering, spautolm, spBreg_err, spBreg_lag, spBreg_sac,
##     stsls, subgraph_eigenw, summary.gmsar, summary.sarlm,
##     summary.spautolm, summary.stsls, trW, vcov.sarlm, Wald1.sarlm
## Welcome to GWmodel version 2.2-2.
## The new version of GWmodel 2.2-2 now is ready
library(spatialreg)
library(corrr)
library(corrplot)
## corrplot 0.84 loaded
source("http://www.sthda.com/upload/rquery_cormat.r")

1. Importing the shapefile for Analysis

LondonBoroughs<-dir_info(here::here("statistical-gis-boundaries-london", 
                                 "ESRI"))%>%
  dplyr::filter(str_detect(path, 
                           "London_Borough_Excluding_MHW.shp$"))%>%
  dplyr::select(path)%>%
  pull()%>%
## read in the file in
  st_read()
## Reading layer `London_Borough_Excluding_MHW' from data source `C:\Users\Elena\OneDrive - University College London\CASA_2020\modules\GIS\final_coursework\Reg\GIS_Coursework\statistical-gis-boundaries-london\ESRI\London_Borough_Excluding_MHW.shp' using driver `ESRI Shapefile'
## Simple feature collection with 33 features and 7 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 503568.2 ymin: 155850.8 xmax: 561957.5 ymax: 200933.9
## projected CRS:  OSGB 1936 / British National Grid
qtm(LondonBoroughs)

2. Reading the OLS regression data:

Datareg <- readxl::read_xlsx('overall.xlsx')
## basic statistics
summary(Datareg[3:8])
##  Percentage_of _Obese_Children_in_Year_6 Median_annual_income
##  Min.   :0.1226                          Min.   :23900       
##  1st Qu.:0.2153                          1st Qu.:27100       
##  Median :0.2352                          Median :28200       
##  Mean   :0.2267                          Mean   :30594       
##  3rd Qu.:0.2503                          3rd Qu.:32400       
##  Max.   :0.3003                          Max.   :60000       
##  Population_per_square_km
##  Min.   : 2216           
##  1st Qu.: 4577           
##  Median : 6049           
##  Mean   : 7594           
##  3rd Qu.:11167           
##  Max.   :16038           
##  Percentage_of_homes_with_deficiency_in_access_to_nature
##  Min.   :0.0947                                         
##  1st Qu.:0.1756                                         
##  Median :0.2593                                         
##  Mean   :0.2783                                         
##  3rd Qu.:0.3512                                         
##  Max.   :1.0000                                         
##  Fast_food_outlets_per_100_residents
##  Min.   :0.06400                    
##  1st Qu.:0.07970                    
##  Median :0.08960                    
##  Mean   :0.09446                    
##  3rd Qu.:0.10280                    
##  Max.   :0.17420                    
##  Percentage_with_no_qualifications_aged_25_64
##  Min.   :0.00000                             
##  1st Qu.:0.04500                             
##  Median :0.06100                             
##  Mean   :0.06148                             
##  3rd Qu.:0.07900                             
##  Max.   :0.11200
sd(Datareg$Percentage_with_no_qualifications_aged_25_64)
## [1] 0.02433377
sd(Datareg$`Percentage_of _Obese_Children_in_Year_6`)
## [1] 0.03772087

Merge boundaries and data:

LonBoroughs <- LondonBoroughs%>%
  left_join(.,
            Datareg, 
            by = c("GSS_CODE" = "Borough Code"))

Visualise the independent variables to observe their spatial variation:

tmap_mode("plot")
## tmap mode set to plotting
tm1 <- tm_shape(LonBoroughs) + 
  tm_polygons("Percentage_of _Obese_Children_in_Year_6", 
              style="jenks",
              palette="Reds")+
  tm_layout(main.title = "Chilhood Obesity in 2017-2018 per London Borough", main.title.position = "center", main.title.size = 1.4, legend.show=TRUE, legend.position=c("left","bottom"), legend.title.size = 0.8, legend.frame=TRUE)+
  tm_scale_bar(position=c("right", "top"), text.size=0.6)+
  tm_compass(north=0,type="8star", position=c("left", "top"))+
  tm_credits("Contains National Statistics\nand Ordnance Survey\ndata Β© Crown copyright\nand database right [2015]\nDepartment: CASA-UCL\nCourse:GIS, Professors:\nA.Denett & A.Maclachlan\nCreated by: Eleni Kalantzi\nDate: 27/12/20", position=c("right", "bottom"))
tm1

tmap_save(tm1, 'ChOb.png', dpi=300)
## Map saved to C:\Users\Elena\OneDrive - University College London\CASA_2020\modules\GIS\final_coursework\Reg\GIS_Coursework\ChOb.png
## Resolution: 2389.896 by 1845.268 pixels
## Size: 7.966321 by 6.150895 inches (300 dpi)
tm2 <- tm_shape(LonBoroughs) + 
  tm_polygons("Median_annual_income", 
              style="jenks",
              palette="Blues")+
  tm_layout(main.title = "Personal Income by Tax Year 2017-2018", main.title.position = "center", main.title.size = 1.3, legend.show=TRUE, legend.position=c("left","bottom"), legend.frame=TRUE)+
  tm_scale_bar(position=c("right", "top"), text.size=0.6)+
  tm_compass(north=0,type="8star", position=c("left", "top"))+
  tm_credits("Contains National Statistics\nand Ordnance Survey\ndata Β© Crown copyright\nand database right [2015]\nDepartment: CASA-UCL\nCourse:GIS, Professors:\nA.Denett & A.Maclachlan\nCreated by: Eleni Kalantzi\nDate: 27/12/20", position=c("right", "bottom"))
tm2

tmap_save(tm2, 'Med_Income.png', dpi=300)
## Map saved to C:\Users\Elena\OneDrive - University College London\CASA_2020\modules\GIS\final_coursework\Reg\GIS_Coursework\Med_Income.png
## Resolution: 2389.896 by 1845.268 pixels
## Size: 7.966321 by 6.150895 inches (300 dpi)
tm3 <- tm_shape(LonBoroughs) + 
  tm_polygons("Population_per_square_km", 
              style="quantile",
              palette="YlOrRd")+
  tm_layout(main.title = "Population Density in 2018 per London Borough", main.title.position = "center", main.title.size = 1.1, legend.show=TRUE, legend.position=c("left","bottom"), legend.frame=TRUE)+
  tm_scale_bar(position=c("right", "top"), text.size=0.6)+
  tm_compass(north=0,type="8star", position=c("left", "top"))+
  tm_credits("Contains National Statistics\nand Ordnance Survey\ndata Β© Crown copyright\nand database right [2015]\nDepartment: CASA-UCL\nCourse:GIS, Professors:\nA.Denett & A.Maclachlan\nCreated by: Eleni Kalantzi\nDate: 27/12/20", position=c("right", "bottom"))
tm3

tmap_save(tm3, 'Density.png', dpi=300)
## Map saved to C:\Users\Elena\OneDrive - University College London\CASA_2020\modules\GIS\final_coursework\Reg\GIS_Coursework\Density.png
## Resolution: 2389.896 by 1845.268 pixels
## Size: 7.966321 by 6.150895 inches (300 dpi)
tm4 <- tm_shape(LonBoroughs) + 
  tm_polygons("Percentage_with_no_qualifications_aged_25_64", 
              style="quantile",
              palette="Purples")+
  tm_layout(main.title = "People aged 25-64 with no academic/professional qualifications in 2018", main.title.position = "center", main.title.size = 1.2, legend.show=TRUE, legend.position=c("left","bottom"), legend.frame=TRUE)+
  tm_scale_bar(position=c("right", "top"), text.size=0.6)+
  tm_compass(north=0,type="8star", position=c("left", "top"))+
  tm_credits("Contains National Statistics\nand Ordnance Survey\ndata Β© Crown copyright\nand database right [2015]\nDepartment: CASA-UCL\nCourse:GIS, Professors:\nA.Denett & A.Maclachlan\nCreated by: Eleni Kalantzi\nDate: 27/12/20", position=c("right", "bottom"))
tm4

tmap_save(tm4, 'Education.png', dpi=300)
## Map saved to C:\Users\Elena\OneDrive - University College London\CASA_2020\modules\GIS\final_coursework\Reg\GIS_Coursework\Education.png
## Resolution: 2389.896 by 1845.268 pixels
## Size: 7.966321 by 6.150895 inches (300 dpi)
tm5 <- tm_shape(LonBoroughs) + 
  tm_polygons("Percentage_of_homes_with_deficiency_in_access_to_nature", 
              style="quantile",
              palette="Greens")+
  tm_layout(main.title = "Percentage of homes located within open space and access to nature in 2012", main.title.position = "center", main.title.size = 1.1, legend.show=TRUE, legend.position=c("left","bottom"),legend.title.size = 0.8,
            legend.frame=TRUE)+
  tm_scale_bar(position=c("right", "top"), text.size=0.6)+
  tm_compass(north=0,type="8star", position=c("left", "top"))+
  tm_credits("Contains National Statistics\nand Ordnance Survey\ndata Β© Crown copyright\nand database right [2015]\nDepartment: CASA-UCL\nCourse:GIS, Professors:\nA.Denett & A.Maclachlan\nCreated by: Eleni Kalantzi\nDate: 27/12/20", position=c("right", "bottom"))
tm5

tmap_save(tm5, 'Nature.png', dpi=300)
## Map saved to C:\Users\Elena\OneDrive - University College London\CASA_2020\modules\GIS\final_coursework\Reg\GIS_Coursework\Nature.png
## Resolution: 2389.896 by 1845.268 pixels
## Size: 7.966321 by 6.150895 inches (300 dpi)
tm6 <- tm_shape(LonBoroughs) + 
  tm_polygons("Fast_food_outlets_per_100_residents", 
              style="quantile",
              palette="RdPu")+
  tm_layout(main.title = "Density of fast food outlets at 02/07/2018", main.title.position = "center", main.title.size = 1.1, legend.show=TRUE, legend.position=c("left","bottom"),legend.title.size = 0.9,
            legend.frame=TRUE)+
  tm_scale_bar(position=c("right", "top"), text.size=0.6)+
  tm_compass(north=0,type="8star", position=c("left", "top"))+
  tm_credits("Contains National Statistics\nand Ordnance Survey\ndata Β© Crown copyright\nand database right [2015]\nDepartment: CASA-UCL\nCourse:GIS, Professors:\nA.Denett & A.Maclachlan\nCreated by: Eleni Kalantzi\nDate: 27/12/20", position=c("right", "bottom"))
tm6

tmap_save(tm6, 'Fast_Food.png', dpi=300)
## Map saved to C:\Users\Elena\OneDrive - University College London\CASA_2020\modules\GIS\final_coursework\Reg\GIS_Coursework\Fast_Food.png
## Resolution: 2389.896 by 1845.268 pixels
## Size: 7.966321 by 6.150895 inches (300 dpi)

Run regression for model 1:

model1 <- lm(LonBoroughs$`Percentage_of _Obese_Children_in_Year_6` ~ 
            LonBoroughs$Median_annual_income + 
            LonBoroughs$Percentage_with_no_qualifications_aged_25_64 + 
            LonBoroughs$Population_per_square_km + 
            LonBoroughs$Percentage_of_homes_with_deficiency_in_access_to_nature+
            LonBoroughs$Fast_food_outlets_per_100_residents)
summary(model1)
## 
## Call:
## lm(formula = LonBoroughs$`Percentage_of _Obese_Children_in_Year_6` ~ 
##     LonBoroughs$Median_annual_income + LonBoroughs$Percentage_with_no_qualifications_aged_25_64 + 
##         LonBoroughs$Population_per_square_km + LonBoroughs$Percentage_of_homes_with_deficiency_in_access_to_nature + 
##         LonBoroughs$Fast_food_outlets_per_100_residents)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.054430 -0.019171  0.001383  0.015530  0.053451 
## 
## Coefficients:
##                                                                       Estimate
## (Intercept)                                                          1.816e-01
## LonBoroughs$Median_annual_income                                    -1.944e-06
## LonBoroughs$Percentage_with_no_qualifications_aged_25_64             4.980e-01
## LonBoroughs$Population_per_square_km                                 2.054e-06
## LonBoroughs$Percentage_of_homes_with_deficiency_in_access_to_nature  8.959e-02
## LonBoroughs$Fast_food_outlets_per_100_residents                      3.534e-01
##                                                                     Std. Error
## (Intercept)                                                          4.510e-02
## LonBoroughs$Median_annual_income                                     1.307e-06
## LonBoroughs$Percentage_with_no_qualifications_aged_25_64             3.109e-01
## LonBoroughs$Population_per_square_km                                 1.539e-06
## LonBoroughs$Percentage_of_homes_with_deficiency_in_access_to_nature  4.243e-02
## LonBoroughs$Fast_food_outlets_per_100_residents                      2.940e-01
##                                                                     t value
## (Intercept)                                                           4.027
## LonBoroughs$Median_annual_income                                     -1.488
## LonBoroughs$Percentage_with_no_qualifications_aged_25_64              1.602
## LonBoroughs$Population_per_square_km                                  1.334
## LonBoroughs$Percentage_of_homes_with_deficiency_in_access_to_nature   2.112
## LonBoroughs$Fast_food_outlets_per_100_residents                       1.202
##                                                                     Pr(>|t|)
## (Intercept)                                                         0.000412
## LonBoroughs$Median_annual_income                                    0.148405
## LonBoroughs$Percentage_with_no_qualifications_aged_25_64            0.120770
## LonBoroughs$Population_per_square_km                                0.193285
## LonBoroughs$Percentage_of_homes_with_deficiency_in_access_to_nature 0.044110
## LonBoroughs$Fast_food_outlets_per_100_residents                     0.239683
##                                                                        
## (Intercept)                                                         ***
## LonBoroughs$Median_annual_income                                       
## LonBoroughs$Percentage_with_no_qualifications_aged_25_64               
## LonBoroughs$Population_per_square_km                                   
## LonBoroughs$Percentage_of_homes_with_deficiency_in_access_to_nature *  
## LonBoroughs$Fast_food_outlets_per_100_residents                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02865 on 27 degrees of freedom
## Multiple R-squared:  0.5131, Adjusted R-squared:  0.423 
## F-statistic: 5.691 on 5 and 27 DF,  p-value: 0.001039
tidy(model1)
## # A tibble: 6 x 5
##   term                                      estimate std.error statistic p.value
##   <chr>                                        <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept)                                1.82e-1   4.51e-2      4.03 4.12e-4
## 2 LonBoroughs$Median_annual_income          -1.94e-6   1.31e-6     -1.49 1.48e-1
## 3 LonBoroughs$Percentage_with_no_qualifi~    4.98e-1   3.11e-1      1.60 1.21e-1
## 4 LonBoroughs$Population_per_square_km       2.05e-6   1.54e-6      1.33 1.93e-1
## 5 LonBoroughs$Percentage_of_homes_with_d~    8.96e-2   4.24e-2      2.11 4.41e-2
## 6 LonBoroughs$Fast_food_outlets_per_100_~    3.53e-1   2.94e-1      1.20 2.40e-1
glance(model1)
## # A tibble: 1 x 12
##   r.squared adj.r.squared  sigma statistic p.value    df logLik   AIC   BIC
##       <dbl>         <dbl>  <dbl>     <dbl>   <dbl> <dbl>  <dbl> <dbl> <dbl>
## 1     0.513         0.423 0.0287      5.69 0.00104     5   73.7 -133. -123.
## # ... with 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>

Histograms and transformations

We’ll plot some histograms to check if the variables follow normal distribution:

hist(LonBoroughs$`Percentage_of _Obese_Children_in_Year_6`, 
     main="Figure 2: A histogram of the distribution of the Childhood Obesity variable", 
     xlab="% of Obese Children in Year 6", 
     border="black", 
     col="white",
     las=1, 
     breaks=20, 
     freq = FALSE)
lines(density(LonBoroughs$`Percentage_of _Obese_Children_in_Year_6`), col="red", lwd=3)

hist(LonBoroughs$Median_annual_income, 
     main="Figure 3: A histogram of median annual income variable", 
     xlab="median annual income", 
     border="black", 
     col="white",
     las=1, 
     breaks=20, 
     freq = FALSE)

lines(density(LonBoroughs$Median_annual_income), col="red", lwd=3)

hist(LonBoroughs$Population_per_square_km, 
     main="Figure 6: A histogram of population density variable", 
     xlab="population per square km", 
     border="black", 
     col="white",
     las=1, 
     breaks=20, 
     freq = FALSE)

lines(density(LonBoroughs$Population_per_square_km), col="red", lwd=3)

hist(LonBoroughs$Percentage_with_no_qualifications_aged_25_64, 
     main="Figure 9: A histogram of people without qualifications variable", 
     xlab="% of people with no qualifications aged 25-64", 
     border="black", 
     col="white",
     las=1, 
     breaks=20, 
     freq = FALSE)

lines(density(LonBoroughs$Percentage_with_no_qualifications_aged_25_64), col="red", lwd=3)

hist(LonBoroughs$Percentage_of_homes_with_deficiency_in_access_to_nature, 
     main="Figure 10: A histogram of access to nature variable", 
     xlab="% of home with deficient access to nature", 
     border="black", 
     col="white",
     las=1, 
     breaks=20, 
     freq = FALSE)

lines(density(LonBoroughs$Percentage_of_homes_with_deficiency_in_access_to_nature), col="red", lwd=3)

hist(LonBoroughs$Fast_food_outlets_per_100_residents, 
     main="Figure 13: A histogram of density of Fast Food outlets variable", 
     xlab="Fast Food outlets per 100 residents", 
     border="black", 
     col="white",
     las=1, 
     breaks=20, 
     freq = FALSE)

lines(density(LonBoroughs$Fast_food_outlets_per_100_residents), col="red", lwd=3)

We’ll use symbox to help us find the appropriate transformation:

symbox(~`Percentage_of _Obese_Children_in_Year_6`, 
       LonBoroughs, 
       na.rm=T,
       powers=seq(-4,4,by=.5))

symbox(~Median_annual_income, 
       LonBoroughs, 
       na.rm=T,
       powers=seq(-4,4,by=.5))

symbox(~Population_per_square_km, 
       LonBoroughs, 
       na.rm=T,
       powers=seq(-4,4,by=.5))

symbox(~Percentage_of_homes_with_deficiency_in_access_to_nature, 
       LonBoroughs, 
       na.rm=T,
       powers=seq(-4,4,by=.5))

symbox(~Fast_food_outlets_per_100_residents, 
       LonBoroughs, 
       na.rm=T,
       powers=seq(-4,4,by=.5))

Transform data for new regression models:

Data_reg_trans <- readxl::read_xlsx('trans_reg.xlsx')
## Merge transformed data and boundaries
LonBoroughstrans <- LondonBoroughs%>%
  left_join(.,
            Data_reg_trans, 
            by = c("GSS_CODE" = "Borough Code"))

Plot histograms of transformed variables:

hist(LonBoroughstrans$`Median_annual_income _^-2.5`, 
     main="Figure 5: A histogram of (median annual income)^-2.5 variable", 
     xlab="(median annual income)^-2.5", 
     border="black", 
     col="white",
     las=1, 
     breaks=20, 
     freq = FALSE)

lines(density(LonBoroughstrans$`Median_annual_income _^-2.5`), col="red", lwd=3)

hist(LonBoroughstrans$log_Population_per_square_km, 
     main="Figure 8: A histogram of log population density variable", 
     xlab="log(population per square km)", 
     border="black", 
     col="white",
     las=1, 
     breaks=20, 
     freq = FALSE)

lines(density(LonBoroughstrans$log_Population_per_square_km), col="red", lwd=3)

hist((LonBoroughs$Percentage_of_homes_with_deficiency_in_access_to_nature)^-0.5, 
     main="Figure 12: A histogram of (access to nature)^-0.5 variable", 
     xlab="(% of home with deficient access to nature)^-0.5", 
     border="black", 
     col="white",
     las=1, 
     breaks=20, 
     freq = FALSE)

lines(density((LonBoroughs$Percentage_of_homes_with_deficiency_in_access_to_nature)^-0.5), col="red", lwd=3)

hist(LonBoroughstrans$`Fast_food_outlets_per_100_residents_^-1`, 
     main="Figure 15: A histogram of density of (Fast Food outlets)^-1 variable", 
     xlab="(Fast Food outlets per 100 residents)^-1", 
     border="black", 
     col="white",
     las=1, 
     breaks=20, 
     freq = FALSE)

lines(density(LonBoroughstrans$`Fast_food_outlets_per_100_residents_^-1`), col="red", lwd=3)

Run regression for model 2:

model2 <- lm(Data_reg_trans$`Percentage_of _Obese_Children_in_Year_6` ~ 
              Data_reg_trans$`Median_annual_income _^-2.5` + 
              Data_reg_trans$Percentage_with_no_qualifications_aged_25_64 + 
              Data_reg_trans$log_Population_per_square_km + 
              Data_reg_trans$Percentage_of_homes_with_deficiency_in_access_to_nature +
              Data_reg_trans$`Fast_food_outlets_per_100_residents_^-1`)
summary(model2)
## 
## Call:
## lm(formula = Data_reg_trans$`Percentage_of _Obese_Children_in_Year_6` ~ 
##     Data_reg_trans$`Median_annual_income _^-2.5` + Data_reg_trans$Percentage_with_no_qualifications_aged_25_64 + 
##         Data_reg_trans$log_Population_per_square_km + Data_reg_trans$Percentage_of_homes_with_deficiency_in_access_to_nature + 
##         Data_reg_trans$`Fast_food_outlets_per_100_residents_^-1`)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.047962 -0.011960 -0.000369  0.007922  0.045922 
## 
## Coefficients:
##                                                                          Estimate
## (Intercept)                                                            -2.503e-02
## Data_reg_trans$`Median_annual_income _^-2.5`                            8.969e+09
## Data_reg_trans$Percentage_with_no_qualifications_aged_25_64             2.238e-01
## Data_reg_trans$log_Population_per_square_km                             5.375e-02
## Data_reg_trans$Percentage_of_homes_with_deficiency_in_access_to_nature  7.519e-02
## Data_reg_trans$`Fast_food_outlets_per_100_residents_^-1`               -4.507e-03
##                                                                        Std. Error
## (Intercept)                                                             9.309e-02
## Data_reg_trans$`Median_annual_income _^-2.5`                            2.285e+09
## Data_reg_trans$Percentage_with_no_qualifications_aged_25_64             2.214e-01
## Data_reg_trans$log_Population_per_square_km                             1.963e-02
## Data_reg_trans$Percentage_of_homes_with_deficiency_in_access_to_nature  3.007e-02
## Data_reg_trans$`Fast_food_outlets_per_100_residents_^-1`                2.249e-03
##                                                                        t value
## (Intercept)                                                             -0.269
## Data_reg_trans$`Median_annual_income _^-2.5`                             3.926
## Data_reg_trans$Percentage_with_no_qualifications_aged_25_64              1.011
## Data_reg_trans$log_Population_per_square_km                              2.738
## Data_reg_trans$Percentage_of_homes_with_deficiency_in_access_to_nature   2.501
## Data_reg_trans$`Fast_food_outlets_per_100_residents_^-1`                -2.004
##                                                                        Pr(>|t|)
## (Intercept)                                                            0.790093
## Data_reg_trans$`Median_annual_income _^-2.5`                           0.000538
## Data_reg_trans$Percentage_with_no_qualifications_aged_25_64            0.321110
## Data_reg_trans$log_Population_per_square_km                            0.010815
## Data_reg_trans$Percentage_of_homes_with_deficiency_in_access_to_nature 0.018761
## Data_reg_trans$`Fast_food_outlets_per_100_residents_^-1`               0.055202
##                                                                           
## (Intercept)                                                               
## Data_reg_trans$`Median_annual_income _^-2.5`                           ***
## Data_reg_trans$Percentage_with_no_qualifications_aged_25_64               
## Data_reg_trans$log_Population_per_square_km                            *  
## Data_reg_trans$Percentage_of_homes_with_deficiency_in_access_to_nature *  
## Data_reg_trans$`Fast_food_outlets_per_100_residents_^-1`               .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02271 on 27 degrees of freedom
## Multiple R-squared:  0.694,  Adjusted R-squared:  0.6374 
## F-statistic: 12.25 on 5 and 27 DF,  p-value: 2.935e-06
tidy(model2)
## # A tibble: 6 x 5
##   term                                      estimate std.error statistic p.value
##   <chr>                                        <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept)                               -2.50e-2   9.31e-2    -0.269 7.90e-1
## 2 Data_reg_trans$`Median_annual_income _^-~  8.97e+9   2.28e+9     3.93  5.38e-4
## 3 Data_reg_trans$Percentage_with_no_qualif~  2.24e-1   2.21e-1     1.01  3.21e-1
## 4 Data_reg_trans$log_Population_per_square~  5.37e-2   1.96e-2     2.74  1.08e-2
## 5 Data_reg_trans$Percentage_of_homes_with_~  7.52e-2   3.01e-2     2.50  1.88e-2
## 6 Data_reg_trans$`Fast_food_outlets_per_10~ -4.51e-3   2.25e-3    -2.00  5.52e-2
glance(model2)
## # A tibble: 1 x 12
##   r.squared adj.r.squared  sigma statistic p.value    df logLik   AIC   BIC
##       <dbl>         <dbl>  <dbl>     <dbl>   <dbl> <dbl>  <dbl> <dbl> <dbl>
## 1     0.694         0.637 0.0227      12.2 2.94e-6     5   81.4 -149. -138.
## # ... with 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>

We’ll exclude the statistically insignificant variable and re-run the regression.

Run regression for model 3:

model3 <- lm(Data_reg_trans$`Percentage_of _Obese_Children_in_Year_6` ~ 
              Data_reg_trans$`Median_annual_income _^-2.5` + 
              Data_reg_trans$log_Population_per_square_km + 
              Data_reg_trans$Percentage_of_homes_with_deficiency_in_access_to_nature +
              Data_reg_trans$`Fast_food_outlets_per_100_residents_^-1`)
summary(model3)
## 
## Call:
## lm(formula = Data_reg_trans$`Percentage_of _Obese_Children_in_Year_6` ~ 
##     Data_reg_trans$`Median_annual_income _^-2.5` + Data_reg_trans$log_Population_per_square_km + 
##         Data_reg_trans$Percentage_of_homes_with_deficiency_in_access_to_nature + 
##         Data_reg_trans$`Fast_food_outlets_per_100_residents_^-1`)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.050502 -0.012735  0.000638  0.011500  0.050048 
## 
## Coefficients:
##                                                                          Estimate
## (Intercept)                                                            -3.911e-02
## Data_reg_trans$`Median_annual_income _^-2.5`                            1.044e+10
## Data_reg_trans$log_Population_per_square_km                             5.943e-02
## Data_reg_trans$Percentage_of_homes_with_deficiency_in_access_to_nature  7.544e-02
## Data_reg_trans$`Fast_food_outlets_per_100_residents_^-1`               -4.873e-03
##                                                                        Std. Error
## (Intercept)                                                             9.208e-02
## Data_reg_trans$`Median_annual_income _^-2.5`                            1.762e+09
## Data_reg_trans$log_Population_per_square_km                             1.882e-02
## Data_reg_trans$Percentage_of_homes_with_deficiency_in_access_to_nature  3.008e-02
## Data_reg_trans$`Fast_food_outlets_per_100_residents_^-1`                2.220e-03
##                                                                        t value
## (Intercept)                                                             -0.425
## Data_reg_trans$`Median_annual_income _^-2.5`                             5.925
## Data_reg_trans$log_Population_per_square_km                              3.158
## Data_reg_trans$Percentage_of_homes_with_deficiency_in_access_to_nature   2.508
## Data_reg_trans$`Fast_food_outlets_per_100_residents_^-1`                -2.195
##                                                                        Pr(>|t|)
## (Intercept)                                                             0.67426
## Data_reg_trans$`Median_annual_income _^-2.5`                           2.24e-06
## Data_reg_trans$log_Population_per_square_km                             0.00379
## Data_reg_trans$Percentage_of_homes_with_deficiency_in_access_to_nature  0.01820
## Data_reg_trans$`Fast_food_outlets_per_100_residents_^-1`                0.03662
##                                                                           
## (Intercept)                                                               
## Data_reg_trans$`Median_annual_income _^-2.5`                           ***
## Data_reg_trans$log_Population_per_square_km                            ** 
## Data_reg_trans$Percentage_of_homes_with_deficiency_in_access_to_nature *  
## Data_reg_trans$`Fast_food_outlets_per_100_residents_^-1`               *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02272 on 28 degrees of freedom
## Multiple R-squared:  0.6825, Adjusted R-squared:  0.6371 
## F-statistic: 15.04 on 4 and 28 DF,  p-value: 1.118e-06
tidy(model3)
## # A tibble: 5 x 5
##   term                                     estimate std.error statistic  p.value
##   <chr>                                       <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)                             -3.91e- 2   9.21e-2    -0.425  6.74e-1
## 2 Data_reg_trans$`Median_annual_income _~  1.04e+10   1.76e+9     5.92   2.24e-6
## 3 Data_reg_trans$log_Population_per_squa~  5.94e- 2   1.88e-2     3.16   3.79e-3
## 4 Data_reg_trans$Percentage_of_homes_wit~  7.54e- 2   3.01e-2     2.51   1.82e-2
## 5 Data_reg_trans$`Fast_food_outlets_per_~ -4.87e- 3   2.22e-3    -2.20   3.66e-2
glance(model3)
## # A tibble: 1 x 12
##   r.squared adj.r.squared  sigma statistic p.value    df logLik   AIC   BIC
##       <dbl>         <dbl>  <dbl>     <dbl>   <dbl> <dbl>  <dbl> <dbl> <dbl>
## 1     0.682         0.637 0.0227      15.0 1.12e-6     4   80.8 -150. -141.
## # ... with 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>

Regression Assumptions for model 3:

a. Plot regression fit line

q <- qplot(x = `Median_annual_income _^-2.5`, 
           y = `Percentage_of _Obese_Children_in_Year_6`, 
           data=LonBoroughstrans)
q + stat_smooth(method="lm", se=FALSE, size=1) + 
  geom_jitter()
## `geom_smooth()` using formula 'y ~ x'

q1 <- qplot(x = log_Population_per_square_km, 
           y = `Percentage_of _Obese_Children_in_Year_6`, 
           data=LonBoroughstrans)
q1 + stat_smooth(method="lm", se=FALSE, size=1) + 
  geom_jitter()
## `geom_smooth()` using formula 'y ~ x'

q2 <- qplot(x = Percentage_of_homes_with_deficiency_in_access_to_nature, 
           y = `Percentage_of _Obese_Children_in_Year_6`, 
           data=LonBoroughstrans)
q2 + stat_smooth(method="lm", se=FALSE, size=1) + 
  geom_jitter()
## `geom_smooth()` using formula 'y ~ x'

q3 <- qplot(x = `Fast_food_outlets_per_100_residents_^-1`, 
           y = `Percentage_of _Obese_Children_in_Year_6`, 
           data=LonBoroughstrans)
q3 + stat_smooth(method="lm", se=FALSE, size=1) + 
  geom_jitter()
## `geom_smooth()` using formula 'y ~ x'

b. Check the residuals into our dataframe

# Save the residuals into our dataframe
model_data <- model3 %>%
  augment(., Data_reg_trans)
# Add them to the shapelayer
LonBoroughstrans <- LonBoroughstrans %>%
  mutate(model3resids = residuals(model3))
# Plot residuals to see if they follow normal distribution
model_data%>%
  dplyr::select(.resid)%>%
  pull()%>%
  qplot()+ 
  geom_histogram(bins=30)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

c. Visualisation of the correlation matrix

# library corrplot + rquery.cormat
rquery.cormat(Data_reg_trans[4:7], type='full',
              graph=TRUE, graphType="correlogram")

## $r
##                                                         Median_annual_income _^-2.5
## Median_annual_income _^-2.5                                                    1.00
## Fast_food_outlets_per_100_residents_^-1                                        0.23
## log_Population_per_square_km                                                  -0.11
## Percentage_of_homes_with_deficiency_in_access_to_nature                       -0.21
##                                                         Fast_food_outlets_per_100_residents_^-1
## Median_annual_income _^-2.5                                                                0.23
## Fast_food_outlets_per_100_residents_^-1                                                    1.00
## log_Population_per_square_km                                                              -0.35
## Percentage_of_homes_with_deficiency_in_access_to_nature                                   -0.47
##                                                         log_Population_per_square_km
## Median_annual_income _^-2.5                                                    -0.11
## Fast_food_outlets_per_100_residents_^-1                                        -0.35
## log_Population_per_square_km                                                    1.00
## Percentage_of_homes_with_deficiency_in_access_to_nature                        -0.16
##                                                         Percentage_of_homes_with_deficiency_in_access_to_nature
## Median_annual_income _^-2.5                                                                               -0.21
## Fast_food_outlets_per_100_residents_^-1                                                                   -0.47
## log_Population_per_square_km                                                                              -0.16
## Percentage_of_homes_with_deficiency_in_access_to_nature                                                    1.00
## 
## $p
##                                                         Median_annual_income _^-2.5
## Median_annual_income _^-2.5                                                    0.00
## Fast_food_outlets_per_100_residents_^-1                                        0.20
## log_Population_per_square_km                                                   0.55
## Percentage_of_homes_with_deficiency_in_access_to_nature                        0.25
##                                                         Fast_food_outlets_per_100_residents_^-1
## Median_annual_income _^-2.5                                                              0.2000
## Fast_food_outlets_per_100_residents_^-1                                                  0.0000
## log_Population_per_square_km                                                             0.0490
## Percentage_of_homes_with_deficiency_in_access_to_nature                                  0.0059
##                                                         log_Population_per_square_km
## Median_annual_income _^-2.5                                                    0.550
## Fast_food_outlets_per_100_residents_^-1                                        0.049
## log_Population_per_square_km                                                   0.000
## Percentage_of_homes_with_deficiency_in_access_to_nature                        0.380
##                                                         Percentage_of_homes_with_deficiency_in_access_to_nature
## Median_annual_income _^-2.5                                                                              0.2500
## Fast_food_outlets_per_100_residents_^-1                                                                  0.0059
## log_Population_per_square_km                                                                             0.3800
## Percentage_of_homes_with_deficiency_in_access_to_nature                                                  0.0000
## 
## $sym
##                                                         Median_annual_income _^-2.5
## Median_annual_income _^-2.5                             1                          
## Fast_food_outlets_per_100_residents_^-1                                            
## log_Population_per_square_km                                                       
## Percentage_of_homes_with_deficiency_in_access_to_nature                            
##                                                         Fast_food_outlets_per_100_residents_^-1
## Median_annual_income _^-2.5                                                                    
## Fast_food_outlets_per_100_residents_^-1                 1                                      
## log_Population_per_square_km                            .                                      
## Percentage_of_homes_with_deficiency_in_access_to_nature .                                      
##                                                         log_Population_per_square_km
## Median_annual_income _^-2.5                                                         
## Fast_food_outlets_per_100_residents_^-1                                             
## log_Population_per_square_km                            1                           
## Percentage_of_homes_with_deficiency_in_access_to_nature                             
##                                                         Percentage_of_homes_with_deficiency_in_access_to_nature
## Median_annual_income _^-2.5                                                                                    
## Fast_food_outlets_per_100_residents_^-1                                                                        
## log_Population_per_square_km                                                                                   
## Percentage_of_homes_with_deficiency_in_access_to_nature 1                                                      
## attr(,"legend")
## [1] 0 ' ' 0.3 '.' 0.6 ',' 0.8 '+' 0.9 '*' 0.95 'B' 1
vif(model1)
##                                    LonBoroughs$Median_annual_income 
##                                                            3.027936 
##            LonBoroughs$Percentage_with_no_qualifications_aged_25_64 
##                                                            2.230273 
##                                LonBoroughs$Population_per_square_km 
##                                                            1.537062 
## LonBoroughs$Percentage_of_homes_with_deficiency_in_access_to_nature 
##                                                            1.923653 
##                     LonBoroughs$Fast_food_outlets_per_100_residents 
##                                                            1.911991
vif(model2)
##                           Data_reg_trans$`Median_annual_income _^-2.5` 
##                                                               1.811432 
##            Data_reg_trans$Percentage_with_no_qualifications_aged_25_64 
##                                                               1.800544 
##                            Data_reg_trans$log_Population_per_square_km 
##                                                               1.465018 
## Data_reg_trans$Percentage_of_homes_with_deficiency_in_access_to_nature 
##                                                               1.537162 
##               Data_reg_trans$`Fast_food_outlets_per_100_residents_^-1` 
##                                                               1.731994
vif(model3)
##                           Data_reg_trans$`Median_annual_income _^-2.5` 
##                                                               1.076685 
##                            Data_reg_trans$log_Population_per_square_km 
##                                                               1.344984 
## Data_reg_trans$Percentage_of_homes_with_deficiency_in_access_to_nature 
##                                                               1.537056 
##               Data_reg_trans$`Fast_food_outlets_per_100_residents_^-1` 
##                                                               1.686878

d. Print some model diagnositcs for homoscedasticity

par(mfrow=c(2,2))    
#plot to 2 by 2 array
plot(model3)

e.Independence of errors

# Standard autocorrelation: run durbin-watson test
DW <- durbinWatsonTest(model3)
tidy(DW)
## # A tibble: 1 x 5
##   statistic p.value autocorrelation method             alternative
##       <dbl>   <dbl>           <dbl> <chr>              <chr>      
## 1      2.19   0.644          -0.140 Durbin-Watson Test two.sided
# Plot the residuals to check spatial autocorrelation
tmap_mode("plot")
## tmap mode set to plotting
tm_res <- tm_shape(LonBoroughstrans) +
  tm_polygons("model3resids", palette = "RdYlBu")+
tm_layout(main.title = "Map of residuals of Model 3", main.title.position = "center", main.title.size = 1.4, 
          legend.show=TRUE, legend.position=c("left","bottom"), legend.title.size = 0.9, legend.frame=TRUE)+
  tm_scale_bar(position=c("right", "top"), text.size=0.6)+
  tm_compass(north=0,type="8star", position=c("left", "top"))+
  tm_credits("Contains National Statistics\nand Ordnance Survey\ndata © Crown copyright\nand database right [2015]\nDepartment: CASA-UCL\nCourse:GIS, Professors:\nA.Denett & A.Maclachlan\nCreated by: Eleni Kalantzi\nDate: 27/12/20", position=c("right", "bottom"))
tm_res
## Variable(s) "model3resids" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

tmap_save(tm_res, 'Static_resid_3.png', dpi=300)
## Variable(s) "model3resids" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Map saved to C:\Users\Elena\OneDrive - University College London\CASA_2020\modules\GIS\final_coursework\Reg\GIS_Coursework\Static_resid_3.png
## Resolution: 2389.896 by 1845.268 pixels
## Size: 7.966321 by 6.150895 inches (300 dpi)

Start calculating Moran’s I:

# Calculate the centroids of all Boroughs in London
coordsB <- LondonBoroughs%>%
  st_centroid()%>%
  st_geometry()
## Warning in st_centroid.sf(.): st_centroid assumes attributes are constant over
## geometries of x
plot(coordsB,axes=TRUE)

# create a neighbours list
LB_nb <- LonBoroughstrans %>%
  poly2nb(., queen=T)

# and 4 nearest neighbours
knn_boroughs <-coordsB %>%
  knearneigh(., k=4)
LB_knn <- knn_boroughs %>%
  knn2nb()

# then plot them
plot(LB_nb, st_geometry(coordsB), col="red")
plot(LondonBoroughs$geometry, add=T)

plot(LB_knn, st_geometry(coordsB), col="blue")
plot(LondonBoroughs$geometry, add=T)

# Create a spatial weights matrix object from these weights

LB.queens_weight <- LB_nb %>%
  nb2listw(., style="C")

LB.knn_4_weight <- LB_knn %>%
  nb2listw(., style="C")

LB.knn_4_weight
## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 33 
## Number of nonzero links: 132 
## Percentage nonzero weights: 12.12121 
## Average number of links: 4 
## Non-symmetric neighbours list
## 
## Weights style: C 
## Weights constants summary:
##    n   nn S0    S1  S2
## C 33 1089 33 14.25 138
LB.queens_weight
## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 33 
## Number of nonzero links: 136 
## Percentage nonzero weights: 12.48852 
## Average number of links: 4.121212 
## 
## Weights style: C 
## Weights constants summary:
##    n   nn S0       S1       S2
## C 33 1089 33 16.01471 143.6613
# Moran's I for Queen and Knn4 for residuals
Queen <- LonBoroughstrans %>%
  st_drop_geometry()%>%
  dplyr::select(model3resids)%>%
  pull()%>%
  moran.test(., LB.queens_weight)%>%
  tidy()
Nearest_neighbour <- LonBoroughstrans %>%
  st_drop_geometry()%>%
  dplyr::select(model3resids)%>%
  pull()%>%
  moran.test(., LB.knn_4_weight)%>%
  tidy()
Queen
## # A tibble: 1 x 7
##   estimate1 estimate2 estimate3 statistic p.value method             alternative
##       <dbl>     <dbl>     <dbl>     <dbl>   <dbl> <chr>              <chr>      
## 1   -0.0810   -0.0312    0.0123    -0.448   0.673 Moran I test unde~ greater
Nearest_neighbour
## # A tibble: 1 x 7
##   estimate1 estimate2 estimate3 statistic p.value method             alternative
##       <dbl>     <dbl>     <dbl>     <dbl>   <dbl> <chr>              <chr>      
## 1   -0.0691   -0.0312    0.0109    -0.362   0.641 Moran I test unde~ greater
# Moran's I for model 3
lm.morantest(model3, LB.knn_4_weight)
## 
##  Global Moran I for regression residuals
## 
## data:  
## model: lm(formula = Data_reg_trans$`Percentage_of
## _Obese_Children_in_Year_6` ~ Data_reg_trans$`Median_annual_income
## _^-2.5` + Data_reg_trans$log_Population_per_square_km +
## Data_reg_trans$Percentage_of_homes_with_deficiency_in_access_to_nature
## + Data_reg_trans$`Fast_food_outlets_per_100_residents_^-1`)
## weights: LB.knn_4_weight
## 
## Moran I statistic standard deviate = -0.37084, p-value = 0.6446
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I      Expectation         Variance 
##      -0.06905526      -0.02996277       0.01111244
lm.morantest(model3, LB.queens_weight)
## 
##  Global Moran I for regression residuals
## 
## data:  
## model: lm(formula = Data_reg_trans$`Percentage_of
## _Obese_Children_in_Year_6` ~ Data_reg_trans$`Median_annual_income
## _^-2.5` + Data_reg_trans$log_Population_per_square_km +
## Data_reg_trans$Percentage_of_homes_with_deficiency_in_access_to_nature
## + Data_reg_trans$`Fast_food_outlets_per_100_residents_^-1`)
## weights: LB.queens_weight
## 
## Moran I statistic standard deviate = -0.40157, p-value = 0.656
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I      Expectation         Variance 
##      -0.08102434      -0.03573773       0.01271824

3. Spatial Regression models

We’ll run regression for Spatial lag model first:

# Spatial lag model for queen
slag_dv_model3_queen <- lagsarlm(`Percentage_of _Obese_Children_in_Year_6` ~ `Median_annual_income _^-2.5` + 
                                   log_Population_per_square_km +
                                   Percentage_of_homes_with_deficiency_in_access_to_nature +
                                   `Fast_food_outlets_per_100_residents_^-1`,
                                  data = LonBoroughstrans,
                                  tol.solve=4.39317e-27,
                                  nb2listw(LB_nb, style="C"),
                                  method = "eigen")
tidy(slag_dv_model3_queen)
## # A tibble: 6 x 5
##   term                                     estimate std.error statistic  p.value
##   <chr>                                       <dbl>     <dbl>     <dbl>    <dbl>
## 1 rho                                     -9.60e- 3   5.03e-2    -0.191 8.49e- 1
## 2 (Intercept)                             -4.03e- 2   8.49e-2    -0.475 6.35e- 1
## 3 `Median_annual_income _^-2.5`            1.05e+10   1.63e+9     6.44  1.21e-10
## 4 log_Population_per_square_km             6.02e- 2   1.77e-2     3.40  6.83e- 4
## 5 Percentage_of_homes_with_deficiency_in~  7.64e- 2   2.80e-2     2.73  6.35e- 3
## 6 `Fast_food_outlets_per_100_residents_^~ -4.88e- 3   2.04e-3    -2.39  1.69e- 2
glance(slag_dv_model3_queen)
## # A tibble: 1 x 6
##   r.squared   AIC   BIC deviance logLik  nobs
##       <dbl> <dbl> <dbl>    <dbl>  <dbl> <int>
## 1     0.683 -148. -137.   0.0144   80.8    33
summary(slag_dv_model3_queen)
## 
## Call:lagsarlm(formula = `Percentage_of _Obese_Children_in_Year_6` ~ 
##     `Median_annual_income _^-2.5` + log_Population_per_square_km + 
##         Percentage_of_homes_with_deficiency_in_access_to_nature + 
##         `Fast_food_outlets_per_100_residents_^-1`, data = LonBoroughstrans, 
##     listw = nb2listw(LB_nb, style = "C"), method = "eigen", tol.solve = 4.39317e-27)
## 
## Residuals:
##         Min          1Q      Median          3Q         Max 
## -0.05087565 -0.01279994 -0.00012501  0.01126475  0.05046705 
## 
## Type: lag 
## Coefficients: (asymptotic standard errors) 
##                                                            Estimate  Std. Error
## (Intercept)                                             -4.0342e-02  8.4900e-02
## `Median_annual_income _^-2.5`                            1.0466e+10  1.6255e+09
## log_Population_per_square_km                             6.0230e-02  1.7734e-02
## Percentage_of_homes_with_deficiency_in_access_to_nature  7.6398e-02  2.7995e-02
## `Fast_food_outlets_per_100_residents_^-1`               -4.8814e-03  2.0441e-03
##                                                         z value  Pr(>|z|)
## (Intercept)                                             -0.4752 0.6346645
## `Median_annual_income _^-2.5`                            6.4386 1.206e-10
## log_Population_per_square_km                             3.3964 0.0006829
## Percentage_of_homes_with_deficiency_in_access_to_nature  2.7290 0.0063530
## `Fast_food_outlets_per_100_residents_^-1`               -2.3881 0.0169355
## 
## Rho: -0.0095987, LR test value: 0.035269, p-value: 0.85103
## Asymptotic standard error: 0.050333
##     z-value: -0.1907, p-value: 0.84876
## Wald statistic: 0.036368, p-value: 0.84876
## 
## Log likelihood: 80.7873 for lag model
## ML residual variance (sigma squared): 0.00043765, (sigma: 0.02092)
## Number of observations: 33 
## Number of parameters estimated: 7 
## AIC: -147.57, (AIC for lm: -149.54)
## LM test for residual autocorrelation
## test value: 9.8723e-07, p-value: 0.99921
## Spatial lag model for knn-4
slag_dv_model3_knn4 <- lagsarlm(`Percentage_of _Obese_Children_in_Year_6` ~ `Median_annual_income _^-2.5` + 
                                  log_Population_per_square_km +
                                  Percentage_of_homes_with_deficiency_in_access_to_nature +
                                  `Fast_food_outlets_per_100_residents_^-1`,
                                 data = LonBoroughstrans,
                                 tol.solve = 2.46661e-27,
                                 nb2listw(LB_knn, style="C"), 
                                 method = "eigen")
tidy(slag_dv_model3_knn4)
## # A tibble: 6 x 5
##   term                                   estimate std.error statistic    p.value
##   <chr>                                     <dbl>     <dbl>     <dbl>      <dbl>
## 1 rho                                     2.70e-1   1.43e-1      1.89    5.86e-2
## 2 (Intercept)                            -8.73e-2   8.55e-2     -1.02    3.07e-1
## 3 `Median_annual_income _^-2.5`           9.61e+9   1.60e+9      6.02    1.75e-9
## 4 log_Population_per_square_km            5.66e-2   1.64e-2      3.45    5.54e-4
## 5 Percentage_of_homes_with_deficiency_i~  6.66e-2   2.65e-2      2.51    1.19e-2
## 6 `Fast_food_outlets_per_100_residents_~ -4.46e-3   1.94e-3     -2.30    2.13e-2
glance(slag_dv_model3_knn4)
## # A tibble: 1 x 6
##   r.squared   AIC   BIC deviance logLik  nobs
##       <dbl> <dbl> <dbl>    <dbl>  <dbl> <int>
## 1     0.716 -151. -140.   0.0129   82.4    33
summary(slag_dv_model3_knn4)
## 
## Call:lagsarlm(formula = `Percentage_of _Obese_Children_in_Year_6` ~ 
##     `Median_annual_income _^-2.5` + log_Population_per_square_km + 
##         Percentage_of_homes_with_deficiency_in_access_to_nature + 
##         `Fast_food_outlets_per_100_residents_^-1`, data = LonBoroughstrans, 
##     listw = nb2listw(LB_knn, style = "C"), method = "eigen", 
##     tol.solve = 2.46661e-27)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0387843 -0.0128332 -0.0026774  0.0155328  0.0383323 
## 
## Type: lag 
## Coefficients: (asymptotic standard errors) 
##                                                            Estimate  Std. Error
## (Intercept)                                             -8.7341e-02  8.5472e-02
## `Median_annual_income _^-2.5`                            9.6069e+09  1.5961e+09
## log_Population_per_square_km                             5.6630e-02  1.6399e-02
## Percentage_of_homes_with_deficiency_in_access_to_nature  6.6642e-02  2.6503e-02
## `Fast_food_outlets_per_100_residents_^-1`               -4.4645e-03  1.9383e-03
##                                                         z value  Pr(>|z|)
## (Intercept)                                             -1.0219 0.3068455
## `Median_annual_income _^-2.5`                            6.0190 1.755e-09
## log_Population_per_square_km                             3.4533 0.0005538
## Percentage_of_homes_with_deficiency_in_access_to_nature  2.5145 0.0119206
## `Fast_food_outlets_per_100_residents_^-1`               -2.3033 0.0212636
## 
## Rho: 0.26967, LR test value: 3.1913, p-value: 0.074032
## Asymptotic standard error: 0.1426
##     z-value: 1.8911, p-value: 0.058606
## Wald statistic: 3.5764, p-value: 0.058606
## 
## Log likelihood: 82.36531 for lag model
## ML residual variance (sigma squared): 0.00039205, (sigma: 0.0198)
## Number of observations: 33 
## Number of parameters estimated: 7 
## AIC: -150.73, (AIC for lm: -149.54)
## LM test for residual autocorrelation
## test value: 3.8276, p-value: 0.050415

We’ll check the residuals:

# Residuals from spatial lag model
LonBoroughstrans <- LonBoroughstrans %>%
  mutate(slag_dv_model3_knn_resids = residuals(slag_dv_model3_knn4))

KNN4Moran <- LonBoroughstrans %>%
  st_drop_geometry()%>%
  dplyr::select(slag_dv_model3_knn_resids)%>%
  pull()%>%
  moran.test(., LB.knn_4_weight)%>%
  tidy()

KNN4Moran
## # A tibble: 1 x 7
##   estimate1 estimate2 estimate3 statistic p.value method             alternative
##       <dbl>     <dbl>     <dbl>     <dbl>   <dbl> <chr>              <chr>      
## 1    -0.177   -0.0312    0.0113     -1.37   0.915 Moran I test unde~ greater
LonBoroughstrans <- LonBoroughstrans %>%
  mutate(slag_dv_model3_queen_resids = residuals(slag_dv_model3_queen))

QueenMoran <- LonBoroughstrans %>%
  st_drop_geometry()%>%
  dplyr::select(slag_dv_model3_queen_resids)%>%
  pull()%>%
  moran.test(., LB.queens_weight)%>%
  tidy()

QueenMoran
## # A tibble: 1 x 7
##   estimate1 estimate2 estimate3 statistic p.value method             alternative
##       <dbl>     <dbl>     <dbl>     <dbl>   <dbl> <chr>              <chr>      
## 1  0.000118   -0.0312    0.0123     0.283   0.389 Moran I test unde~ greater

Then, we’ll run Spatial Error Model:

# SEM for Queen
SEM_model3_queen <- errorsarlm(`Percentage_of _Obese_Children_in_Year_6` ~ `Median_annual_income _^-2.5` + 
                                 log_Population_per_square_km +
                                 Percentage_of_homes_with_deficiency_in_access_to_nature +
                                 `Fast_food_outlets_per_100_residents_^-1`,
                                  data = LonBoroughstrans,
                                  tol.solve = 3.19139e-27,
                                 nb2listw(LB_nb, style="C"),
                                 method = "eigen")
tidy(SEM_model3_queen)
## # A tibble: 6 x 5
##   term                                     estimate std.error statistic  p.value
##   <chr>                                       <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)                             -3.93e- 2   8.48e-2  -0.463   6.43e- 1
## 2 `Median_annual_income _^-2.5`            1.04e+10   1.62e+9   6.44    1.22e-10
## 3 log_Population_per_square_km             5.95e- 2   1.73e-2   3.43    6.01e- 4
## 4 Percentage_of_homes_with_deficiency_in~  7.55e- 2   2.77e-2   2.73    6.41e- 3
## 5 `Fast_food_outlets_per_100_residents_^~ -4.88e- 3   2.04e-3  -2.38    1.71e- 2
## 6 lambda                                  -2.17e- 3   2.50e-1  -0.00866 9.93e- 1
glance(SEM_model3_queen)
## # A tibble: 1 x 6
##   r.squared   AIC   BIC deviance logLik  nobs
##       <dbl> <dbl> <dbl>    <dbl>  <dbl> <int>
## 1     0.682 -148. -137.   0.0145   80.8    33
summary(SEM_model3_queen)
## 
## Call:errorsarlm(formula = `Percentage_of _Obese_Children_in_Year_6` ~ 
##     `Median_annual_income _^-2.5` + log_Population_per_square_km + 
##         Percentage_of_homes_with_deficiency_in_access_to_nature + 
##         `Fast_food_outlets_per_100_residents_^-1`, data = LonBoroughstrans, 
##     listw = nb2listw(LB_nb, style = "C"), method = "eigen", tol.solve = 3.19139e-27)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0505145 -0.0127568  0.0006266  0.0115140  0.0500627 
## 
## Type: error 
## Coefficients: (asymptotic standard errors) 
##                                                            Estimate  Std. Error
## (Intercept)                                             -3.9251e-02  8.4785e-02
## `Median_annual_income _^-2.5`                            1.0443e+10  1.6223e+09
## log_Population_per_square_km                             5.9462e-02  1.7329e-02
## Percentage_of_homes_with_deficiency_in_access_to_nature  7.5513e-02  2.7699e-02
## `Fast_food_outlets_per_100_residents_^-1`               -4.8759e-03  2.0448e-03
##                                                         z value  Pr(>|z|)
## (Intercept)                                             -0.4630 0.6433995
## `Median_annual_income _^-2.5`                            6.4370 1.219e-10
## log_Population_per_square_km                             3.4313 0.0006006
## Percentage_of_homes_with_deficiency_in_access_to_nature  2.7262 0.0064066
## `Fast_food_outlets_per_100_residents_^-1`               -2.3845 0.0171024
## 
## Lambda: -0.0021665, LR test value: 5.1147e-05, p-value: 0.99429
## Asymptotic standard error: 0.25009
##     z-value: -0.0086627, p-value: 0.99309
## Wald statistic: 7.5043e-05, p-value: 0.99309
## 
## Log likelihood: 80.7697 for error model
## ML residual variance (sigma squared): 0.00043812, (sigma: 0.020931)
## Number of observations: 33 
## Number of parameters estimated: 7 
## AIC: -147.54, (AIC for lm: -149.54)
## SEM for KNN4
SEM_model3_knn4 <- errorsarlm(`Percentage_of _Obese_Children_in_Year_6` ~ `Median_annual_income _^-2.5` + 
                                log_Population_per_square_km +
                                Percentage_of_homes_with_deficiency_in_access_to_nature +
                                `Fast_food_outlets_per_100_residents_^-1`,
                              data = LonBoroughstrans,
                              tol.solve = 3.19139e-27,
                               nb2listw(LB_knn, style="C"),
                               method = "eigen")
tidy(SEM_model3_knn4)
## # A tibble: 6 x 5
##   term                                     estimate std.error statistic  p.value
##   <chr>                                       <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)                             -6.94e- 2   8.21e-2    -0.845 3.98e- 1
## 2 `Median_annual_income _^-2.5`            1.11e+10   1.44e+9     7.69  1.53e-14
## 3 log_Population_per_square_km             6.62e- 2   1.65e-2     4.00  6.25e- 5
## 4 Percentage_of_homes_with_deficiency_in~  8.50e- 2   2.72e-2     3.13  1.76e- 3
## 5 `Fast_food_outlets_per_100_residents_^~ -5.12e- 3   2.03e-3    -2.52  1.16e- 2
## 6 lambda                                  -2.63e- 1   2.82e-1    -0.935 3.50e- 1
glance(SEM_model3_knn4)
## # A tibble: 1 x 6
##   r.squared   AIC   BIC deviance logLik  nobs
##       <dbl> <dbl> <dbl>    <dbl>  <dbl> <int>
## 1     0.692 -148. -138.   0.0141   81.0    33
summary(SEM_model3_knn4)
## 
## Call:errorsarlm(formula = `Percentage_of _Obese_Children_in_Year_6` ~ 
##     `Median_annual_income _^-2.5` + log_Population_per_square_km + 
##         Percentage_of_homes_with_deficiency_in_access_to_nature + 
##         `Fast_food_outlets_per_100_residents_^-1`, data = LonBoroughstrans, 
##     listw = nb2listw(LB_knn, style = "C"), method = "eigen", 
##     tol.solve = 3.19139e-27)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0510293 -0.0099604 -0.0024567  0.0091206  0.0501923 
## 
## Type: error 
## Coefficients: (asymptotic standard errors) 
##                                                            Estimate  Std. Error
## (Intercept)                                             -6.9351e-02  8.2093e-02
## `Median_annual_income _^-2.5`                            1.1097e+10  1.4439e+09
## log_Population_per_square_km                             6.6198e-02  1.6536e-02
## Percentage_of_homes_with_deficiency_in_access_to_nature  8.5043e-02  2.7185e-02
## `Fast_food_outlets_per_100_residents_^-1`               -5.1189e-03  2.0286e-03
##                                                         z value  Pr(>|z|)
## (Intercept)                                             -0.8448  0.398227
## `Median_annual_income _^-2.5`                            7.6851 1.532e-14
## log_Population_per_square_km                             4.0033 6.247e-05
## Percentage_of_homes_with_deficiency_in_access_to_nature  3.1284  0.001758
## `Fast_food_outlets_per_100_residents_^-1`               -2.5234  0.011623
## 
## Lambda: -0.26334, LR test value: 0.44141, p-value: 0.50644
## Asymptotic standard error: 0.28168
##     z-value: -0.9349, p-value: 0.34984
## Wald statistic: 0.87403, p-value: 0.34984
## 
## Log likelihood: 80.99038 for error model
## ML residual variance (sigma squared): 0.00042717, (sigma: 0.020668)
## Number of observations: 33 
## Number of parameters estimated: 7 
## AIC: -147.98, (AIC for lm: -149.54)

4. GWR model

# Set projections
st_crs(LonBoroughstrans) = 27700
## Warning: st_crs<- : replacing crs does not reproject data; use st_transform for
## that
LonBoroughsSPtrans <- LonBoroughstrans %>%
  as(., "Spatial")
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum Unknown based on Airy 1830 ellipsoid in CRS
## definition
## Warning in showSRID(SRS_string, format = "PROJ", multiline = "NO", prefer_proj =
## prefer_proj): Discarded datum OSGB 1936 in CRS definition
st_crs(coordsB) = 27700
## Warning: st_crs<- : replacing crs does not reproject data; use st_transform for
## that
coordsBSP <- coordsB %>%
  as(., "Spatial")
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum Unknown based on Airy 1830 ellipsoid in CRS
## definition

## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj =
## prefer_proj): Discarded datum OSGB 1936 in CRS definition
coordsBSP
## class       : SpatialPoints 
## features    : 33 
## extent      : 507889.7, 554049, 163541.2, 196420.8  (xmin, xmax, ymin, ymax)
## crs         : +proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +ellps=airy +units=m +no_defs
# calculate bandwitch
GWRbandwidthtrans <- gwr.sel(Percentage_of._Obese_Children_in_Year_6 ~ Median_annual_income._..2.5 + 
                          log_Population_per_square_km +
                          Percentage_of_homes_with_deficiency_in_access_to_nature +
                          Fast_food_outlets_per_100_residents_..1,
                        data = LonBoroughsSPtrans, 
                        coords=coordsBSP,
                        adapt=T)
## Warning in gwr.sel(Percentage_of._Obese_Children_in_Year_6 ~
## Median_annual_income._..2.5 + : data is Spatial* object, ignoring coords
## argument
## Adaptive q: 0.381966 CV score: 0.03343324 
## Adaptive q: 0.618034 CV score: 0.03528038 
## Adaptive q: 0.236068 CV score: 0.03094752 
## Adaptive q: 0.145898 CV score: 0.03079962 
## Adaptive q: 0.1784092 CV score: 0.02984946 
## Adaptive q: 0.189451 CV score: 0.02991537 
## Adaptive q: 0.1802365 CV score: 0.02982284 
## Adaptive q: 0.1825907 CV score: 0.02980696 
## Adaptive q: 0.183218 CV score: 0.02981068 
## Adaptive q: 0.1822075 CV score: 0.02980534 
## Adaptive q: 0.1814547 CV score: 0.02980813 
## Adaptive q: 0.1820962 CV score: 0.02980496 
## Adaptive q: 0.1818512 CV score: 0.02980428 
## Adaptive q: 0.1816997 CV score: 0.02980546 
## Adaptive q: 0.1819448 CV score: 0.02980452 
## Adaptive q: 0.1817933 CV score: 0.02980447 
## Adaptive q: 0.1818919 CV score: 0.02980438 
## Adaptive q: 0.1818512 CV score: 0.02980428
GWRbandwidthtrans
## [1] 0.1818512
# run the GWR model
gwr.modeltrans = gwr(Percentage_of._Obese_Children_in_Year_6 ~ Median_annual_income._..2.5 + 
                       log_Population_per_square_km +
                       Percentage_of_homes_with_deficiency_in_access_to_nature +
                       Fast_food_outlets_per_100_residents_..1,
                  data = LonBoroughsSPtrans,
                coords=coordsBSP,
                adapt=GWRbandwidthtrans,
                hatmatrix = TRUE,
                se.fit = TRUE)
## Warning in gwr(Percentage_of._Obese_Children_in_Year_6 ~
## Median_annual_income._..2.5 + : data is Spatial* object, ignoring coords
## argument
## Warning in proj4string(data): CRS object has comment, which is lost in output
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum Unknown based on Airy 1830 ellipsoid in CRS
## definition

Let’s print the results of the GWR model:

gwr.modeltrans
## Call:
## gwr(formula = Percentage_of._Obese_Children_in_Year_6 ~ Median_annual_income._..2.5 + 
##     log_Population_per_square_km + Percentage_of_homes_with_deficiency_in_access_to_nature + 
##     Fast_food_outlets_per_100_residents_..1, data = LonBoroughsSPtrans, 
##     coords = coordsBSP, adapt = GWRbandwidthtrans, hatmatrix = TRUE, 
##     se.fit = TRUE)
## Kernel function: gwr.Gauss 
## Adaptive quantile: 0.1818512 (about 6 of 33 data points)
## Summary of GWR coefficient estimates at data points:
##                                                                Min.     1st Qu.
## X.Intercept.                                            -1.1758e-01 -5.8218e-02
## Median_annual_income._..2.5                              5.8826e+09  7.1845e+09
## log_Population_per_square_km                             2.6732e-02  4.3915e-02
## Percentage_of_homes_with_deficiency_in_access_to_nature  4.8259e-02  6.3068e-02
## Fast_food_outlets_per_100_residents_..1                 -6.8343e-03 -6.2540e-03
##                                                              Median     3rd Qu.
## X.Intercept.                                            -1.8931e-02  1.4746e-02
## Median_annual_income._..2.5                              8.5226e+09  9.4882e+09
## log_Population_per_square_km                             5.5738e-02  6.6814e-02
## Percentage_of_homes_with_deficiency_in_access_to_nature  7.6677e-02  8.8804e-02
## Fast_food_outlets_per_100_residents_..1                 -3.2608e-03 -1.4918e-03
##                                                                Max.     Global
## X.Intercept.                                             8.4562e-02 -3.910e-02
## Median_annual_income._..2.5                              1.1447e+10  1.044e+10
## log_Population_per_square_km                             8.3678e-02  5.940e-02
## Percentage_of_homes_with_deficiency_in_access_to_nature  9.8878e-02  7.540e-02
## Fast_food_outlets_per_100_residents_..1                  6.6187e-04 -4.900e-03
## Number of data points: 33 
## Effective number of parameters (residual: 2traceS - traceS'S): 14.50372 
## Effective degrees of freedom (residual: 2traceS - traceS'S): 18.49628 
## Sigma (residual: 2traceS - traceS'S): 0.02021397 
## Effective number of parameters (model: traceS): 11.35138 
## Effective degrees of freedom (model: traceS): 21.64862 
## Sigma (model: traceS): 0.01868438 
## Sigma (ML): 0.01513341 
## AICc (GWR p. 61, eq 2.33; p. 96, eq. 4.21): -141.4577 
## AIC (GWR p. 96, eq. 4.22): -171.5948 
## Residual sum of squares: 0.007557667 
## Quasi-global R2: 0.8340129
resultstrans <- as.data.frame(gwr.modeltrans$SDF)
names(resultstrans)
##  [1] "sum.w"                                                         
##  [2] "X.Intercept."                                                  
##  [3] "Median_annual_income._..2.5"                                   
##  [4] "log_Population_per_square_km"                                  
##  [5] "Percentage_of_homes_with_deficiency_in_access_to_nature"       
##  [6] "Fast_food_outlets_per_100_residents_..1"                       
##  [7] "X.Intercept._se"                                               
##  [8] "Median_annual_income._..2.5_se"                                
##  [9] "log_Population_per_square_km_se"                               
## [10] "Percentage_of_homes_with_deficiency_in_access_to_nature_se"    
## [11] "Fast_food_outlets_per_100_residents_..1_se"                    
## [12] "gwr.e"                                                         
## [13] "pred"                                                          
## [14] "pred.se"                                                       
## [15] "localR2"                                                       
## [16] "X.Intercept._se_EDF"                                           
## [17] "Median_annual_income._..2.5_se_EDF"                            
## [18] "log_Population_per_square_km_se_EDF"                           
## [19] "Percentage_of_homes_with_deficiency_in_access_to_nature_se_EDF"
## [20] "Fast_food_outlets_per_100_residents_..1_se_EDF"                
## [21] "pred.se.1"

Moran’s I for GWR:

gwr.morantest(gwr.modeltrans, LB.queens_weight)
## Warning in gwr.morantest(gwr.modeltrans, LB.queens_weight): LB.queens_weightnot
## row standardised
## 
##  Leung et al. 2000 three moment approximation for Moran's I
## 
## data:  GWR residuals
## statistic = 1106.5, df = 1130.9, p-value = 0.3074
## sample estimates:
##           I 
## -0.09815888
gwr.morantest(gwr.modeltrans, LB.knn_4_weight)
## Warning in gwr.morantest(gwr.modeltrans, LB.knn_4_weight): LB.knn_4_weightnot
## row standardised
## 
##  Leung et al. 2000 three moment approximation for Moran's I
## 
## data:  GWR residuals
## statistic = 4305.5, df = 4329.7, p-value = 0.3999
## sample estimates:
##          I 
## -0.1235569

Monte Carlo approach:

res.mont <- gwr.montecarlo(Percentage_of._Obese_Children_in_Year_6 ~ Median_annual_income._..2.5 + 
                             log_Population_per_square_km +
                             Percentage_of_homes_with_deficiency_in_access_to_nature +
                             Fast_food_outlets_per_100_residents_..1,
                           data = LonBoroughsSPtrans, bw = 30, adaptive = TRUE,
                          nsims = 95, kernel="gaussian")
## 
## Tests based on the Monte Carlo significance test
## 
##                                                         p-value
## (Intercept)                                              0.5521
## Median_annual_income._..2.5                              0.5938
## log_Population_per_square_km                             0.4688
## Percentage_of_homes_with_deficiency_in_access_to_nature  0.5833
## Fast_food_outlets_per_100_residents_..1                  0.1042
bw <- bw.gwr(Percentage_of._Obese_Children_in_Year_6 ~ Median_annual_income._..2.5 + 
         log_Population_per_square_km +
         Percentage_of_homes_with_deficiency_in_access_to_nature +
         Fast_food_outlets_per_100_residents_..1,
       data = LonBoroughsSPtrans,
       approach = "AIC", kernel="gaussian")
## Fixed bandwidth: 28625 AICc value: -146.554 
## Fixed bandwidth: 17694.76 AICc value: -146.4194 
## Fixed bandwidth: 35380.26 AICc value: -146.5028 
## Fixed bandwidth: 24450.02 AICc value: -146.5747 
## Fixed bandwidth: 21869.74 AICc value: -146.5653 
## Fixed bandwidth: 26044.72 AICc value: -146.5697 
## Fixed bandwidth: 23464.44 AICc value: -146.5745 
## Fixed bandwidth: 25059.14 AICc value: -146.5735 
## Fixed bandwidth: 24073.56 AICc value: -146.575 
## Fixed bandwidth: 23840.9 AICc value: -146.5749
res.mont
##                                                           p-value
## (Intercept)                                             0.5520833
## Median_annual_income._..2.5                             0.5937500
## log_Population_per_square_km                            0.4687500
## Percentage_of_homes_with_deficiency_in_access_to_nature 0.5833333
## Fast_food_outlets_per_100_residents_..1                 0.1041667
bw
## [1] 24073.56

Attach coefficients to original SF and plot them:

LonBoroughs2 <- LonBoroughstrans %>%
  mutate(coefMedinc = resultstrans$Median_annual_income._..2.5,
         coefPopdens = resultstrans$log_Population_per_square_km ,
         coefaccnat = resultstrans$Percentage_of_homes_with_deficiency_in_access_to_nature,
         coeffastfood = resultstrans$Fast_food_outlets_per_100_residents_..1)
tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(LonBoroughs2) +
  tm_polygons(col = "coefMedinc", palette = "RdBu", alpha = 0.6)
tm_shape(LonBoroughs2) +
  tm_polygons(col = "coefPopdens", palette = "RdBu", alpha = 0.6)
tm_shape(LonBoroughs2) +
  tm_polygons(col = "coefaccnat", palette = "RdBu", alpha = 0.6)
tm_shape(LonBoroughs2) +
  tm_polygons(col = "coeffastfood", palette = "RdBu", alpha = 0.6, midpoint = NA)

Run the significance test:

sigTest1 = abs(gwr.modeltrans$SDF$Median_annual_income._..2.5)-2 * gwr.modeltrans$SDF$Median_annual_income._..2.5_se
sigTest2 = abs(gwr.modeltrans$SDF$log_Population_per_square_km)-2 * gwr.modeltrans$SDF$log_Population_per_square_km_se
sigTest3 = abs(gwr.modeltrans$SDF$Percentage_of_homes_with_deficiency_in_access_to_nature)-2 * gwr.modeltrans$SDF$Percentage_of_homes_with_deficiency_in_access_to_nature_se
sigTest4 = abs(gwr.modeltrans$SDF$Fast_food_outlets_per_100_residents_..1)-2 * gwr.modeltrans$SDF$Fast_food_outlets_per_100_residents_..1_se

Finally, let’s plot significance results:

tmap_mode("view")
## tmap mode set to interactive viewing
LonBoroughs2 <- LonBoroughs2 %>%
  mutate(GWRMedincSig = sigTest1)
tm_shape(LonBoroughs2) +
  tm_polygons(col = "GWRMedincSig", palette = "RdYlBu", alpha = 0.6)
LonBoroughs2 <- LonBoroughs2 %>%
  mutate(GWRPopdensSig = sigTest2)
tm_shape(LonBoroughs2) +
  tm_polygons(col = "GWRPopdensSig", palette = "RdYlBu", alpha = 0.6)
## Variable(s) "GWRPopdensSig" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
LonBoroughs2 <- LonBoroughs2 %>%
  mutate(GWRAccNatSig = sigTest3)
tm_shape(LonBoroughs2) +
  tm_polygons(col = "GWRAccNatSig", palette = "RdYlBu", alpha = 0.6)
## Variable(s) "GWRAccNatSig" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
LonBoroughs2 <- LonBoroughs2 %>%
  mutate(GWRFastFoodSig = sigTest4)
tm_shape(LonBoroughs2) +
  tm_polygons(col = "GWRFastFoodSig", palette = "RdYlBu", alpha = 0.6)
## Variable(s) "GWRFastFoodSig" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
sigTest1
##  [1] 6494570199 5818341549 6762209986 7129981151 6767681807 5410683104
##  [7] 7112704136 6523130404 5626003482 5298905269 3152025249 4253204482
## [13] 5388131347 5093035166 5965748507 4720371552 1992250156 3286361570
## [19] 5886135686 6983030640 4768454306 4205642723 5265397869 4459674989
## [25] 3849698529 3584439046 3413860658 2765026616 2073460230 3214685019
## [31] 2668561962 4051838656 2572724311
sigTest2
##  [1]  0.0347850425  0.0300517886  0.0309126669  0.0243395460  0.0167735001
##  [6]  0.0165918546  0.0144861157  0.0100603739  0.0108721879 -0.0009156378
## [11]  0.0140948234  0.0103522636  0.0218435683  0.0191356370  0.0268009181
## [16] -0.0013939580 -0.0225857975 -0.0021002933  0.0330883348  0.0352208948
## [21]  0.0300368369  0.0241308783  0.0208530941  0.0134662125  0.0023928451
## [26] -0.0142431500 -0.0129705919 -0.0234317865 -0.0299615283 -0.0197936680
## [31] -0.0093990448  0.0098240438 -0.0269630076
sigTest3
##  [1] -0.0029793835  0.0106136068  0.0319513093 -0.0059333961 -0.0165648738
##  [6]  0.0320912495 -0.0029596374  0.0008485076 -0.0043641512  0.0052387110
## [11]  0.0167737872  0.0279888193  0.0322521525  0.0309353710  0.0375389898
## [16]  0.0121025556 -0.0038385365  0.0125461474  0.0038766795 -0.0232631727
## [21] -0.0054203113  0.0102965356  0.0201600532  0.0219684156  0.0211505532
## [26]  0.0082529576  0.0139292702  0.0052628911 -0.0010450701  0.0048096915
## [31]  0.0071421998  0.0242401052  0.0079773284
sigTest4
##  [1]  0.0022801577  0.0016918104 -0.0004060366  0.0024515146  0.0021396329
##  [6] -0.0042396357  0.0025725693  0.0020834228  0.0011460851  0.0005238704
## [11] -0.0006373207 -0.0017108672 -0.0015809347 -0.0040199069 -0.0037847682
## [16] -0.0009363634 -0.0044756062 -0.0051427488  0.0021712747  0.0015936867
## [21]  0.0018581773  0.0002777879 -0.0003014672 -0.0013637869 -0.0022246244
## [26] -0.0029620626 -0.0032366964 -0.0041755585 -0.0043642471 -0.0032649394
## [31] -0.0055761543 -0.0050048465 -0.0039134360